/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "Encoder.hpp"
#include "../../Frontend/IRToString.hpp"
#include "../../IR/Kernel.hpp"
#include "../../IR/SWSBSetter.hpp"
#include "../../Models/Models.hpp"
#include "../../Timer/Timer.hpp"
#include "../../bits.hpp"
#include "../../strings.hpp"
#include "IGAToGEDTranslation.hpp"

#include <cstring>

using namespace iga;


static const char *gedReturnValueToString(GED_RETURN_VALUE rv) {
  switch (rv) {
  case GED_RETURN_VALUE_SUCCESS:
    return "success";
  case GED_RETURN_VALUE_CYCLIC_DEPENDENCY:
    return "cyclic dependency";
  case GED_RETURN_VALUE_NULL_POINTER:
    return "null pointer";
  case GED_RETURN_VALUE_OPCODE_NOT_SUPPORTED:
    return "unsupported opcode";
  case GED_RETURN_VALUE_NO_COMPACT_FORM:
    return "no compact form";
  case GED_RETURN_VALUE_INVALID_FIELD:
    return "invalid field";
  case GED_RETURN_VALUE_INVALID_VALUE:
    return "invalid value";
  case GED_RETURN_VALUE_INVALID_INTERPRETER:
    return "invalid interpreter";
  default:
    return "other error";
  }
}
void Encoder::handleGedError(int line, const char *setter,
                             GED_RETURN_VALUE status) {
  errorT("IGALibrary/GED/Encoder.cpp:", line, ": GED_Set", setter, ": ",
         gedReturnValueToString(status));
}

Encoder::Encoder(const Model &model, ErrorHandler &errHandler,
                 const EncoderOpts &opts)
    : GEDBitProcessor(model, errHandler), m_opts(opts),
      m_numberInstructionsEncoded(0), m_mem(nullptr) {
  // derive the swsb encoding mode from platform if not set
  if (opts.swsbEncodeMode == SWSB_ENCODE_MODE::SWSBInvalidMode) {
    m_opts.swsbEncodeMode = model.getSWSBEncodeMode();
  }
}

void Encoder::encodeKernelPreProcess(Kernel &k) { doEncodeKernelPreProcess(k); }

void Encoder::doEncodeKernelPreProcess(Kernel &k) {
  if (m_opts.autoDepSet && platform() >= Platform::XE) {
    SWSBAnalyzer swsbAnalyzer(k, errorHandler(), m_opts.swsbEncodeMode,
                              m_opts.sbidCount
         );
    swsbAnalyzer.run();
  }
}

double Encoder::getElapsedTimeMicros(unsigned int idx) {
  return getIGATimerUS(idx);
}

int64_t Encoder::getElapsedTimeTicks(unsigned int idx) {
  return getIGATimerTicks(idx);
}

std::string Encoder::getTimerName(unsigned int idx) {
  return getIGATimerNames(idx);
}

size_t Encoder::getNumInstructionsEncoded() const {
  return m_numberInstructionsEncoded;
}

void Encoder::encodeKernel(Kernel &k, MemManager &mem, void *&bits,
                           uint32_t &bitsLen) {
#ifndef IGA_DISABLE_ENCODER_EXCEPTIONS
  try {
#endif
    initIGATimer();
    setIGAKernelName("test");
    IGA_ASSERT(k.getModel().platform == platform(),
               "kernel/encoder model mismatch");

    encodeKernelPreProcess(k);
    m_needToPatch.clear();
    m_mem = &mem;
    m_numberInstructionsEncoded = k.getInstructionCount();
    size_t allocLen = m_numberInstructionsEncoded * UNCOMPACTED_SIZE;
    if (allocLen == 0) // for empty kernel case
      allocLen = 4;
    m_instBuf = (uint8_t *)mem.alloc(allocLen);
    if (!m_instBuf) {
      fatalAtT(0, "failed to allocate memory for kernel binary");
      return;
    }

    for (auto blk : k.getBlockList()) {
      START_ENCODER_TIMER();
      encodeBlock(k, blk);
      STOP_ENCODER_TIMER();
      if (hasFatalError()) {
        return;
      }
    }
    START_ENCODER_TIMER();
    patchJumpOffsets();
    STOP_ENCODER_TIMER();

    // setting actual size
    bitsLen = currentPc();
    bits = m_instBuf;

    applyGedWorkarounds(k, currentPc());

    // clear any padding
    memset(m_instBuf + bitsLen, 0, allocLen - bitsLen);
#ifndef IGA_DISABLE_ENCODER_EXCEPTIONS
  } catch (const iga::FatalError &) {
    // error is already reported
  }
#endif
}

void Encoder::encodeInlineBinaryInst(Instruction &inst) {
  setEncodedPC(&inst, currentPc());
  for (auto i = 0, j = 3; i < 4; ++i, --j) {
    uint8_t *src = (uint8_t *)&inst.getInlineBinary().at(j);
    uint8_t *dst = m_instBuf + currentPc() + i * 4;
    memcpy_s(dst, 4, src, 4);
  }

  // Inline-binary-instruction must have size 16
  advancePc(16);
}

void Encoder::encodeBlock(Kernel &k, Block *blk) {
  m_blockToOffsetMap[blk] = currentPc();

  InstList &instList = blk->getInstList();
  const auto instListEnd = instList.end();
  for (auto instIter = instList.begin(); instIter != instListEnd; ++instIter) {
    Instruction *inst = *instIter;

    if (inst->hasInstOpt(InstOpt::CACHELINEALIGN)) {
      while (currentPc() / 64 != (currentPc() + 31) / 64) {
        SWSB swsb(SWSB::DistType::NO_DIST, SWSB::TokenType::NOTOKEN, 0, 0);
        Instruction *syncInst = k.createSyncNopInstruction(swsb);
        setCurrInst(syncInst);
        encodeInstruction(*syncInst);
        if (hasFatalError()) {
          return;
        }
        setEncodedPC(syncInst, currentPc());
        GED_RETURN_VALUE status = GED_RETURN_VALUE_SIZE;
        status = GED_EncodeIns(&m_gedInst, GED_INS_TYPE_NATIVE,
                               m_instBuf + currentPc());
        if (status != GED_RETURN_VALUE_SUCCESS) {
          errorAtT(inst->getLoc(), "GED unable to encode instruction: ",
                   gedReturnValueToString(status));
        }
        blk->insertInstBefore(instIter, syncInst);
        advancePc(16);
      }
    }

    setCurrInst(inst);

    if (inst->isInlineBinaryInstruction()) {
      encodeInlineBinaryInst(*inst);
      continue;
    }

    encodeInstruction(*inst);
    if (hasFatalError()) {
      return;
    }
    setEncodedPC(inst, currentPc());

    GED_RETURN_VALUE status = GED_RETURN_VALUE_SIZE;

    // If -Xforce-no-compact is set, do not compact any insruction
    // Otherwise, if {NoCompact} is set, do not compact the instruction
    // Otherwise, if {Compacted} is set on the instruction, try to compact it
    // and report error on fail Otherwise, if no compaction setting on the
    // instruction, try to compact the instruction if -Xauto-compact Otherwise,
    // do not compact the instruction
    bool mustCompact = inst->hasInstOpt(InstOpt::COMPACTED);
    bool mustNotCompact = inst->hasInstOpt(InstOpt::NOCOMPACT);
    if (m_opts.forceNoCompact) {
      mustCompact = false;
      mustNotCompact = true;
    }

    int32_t iLen = 16;
    if (mustCompact || (!mustNotCompact && m_opts.autoCompact)) {
      // try compact first
      status = GED_EncodeIns(&m_gedInst, GED_INS_TYPE_COMPACT,
                             m_instBuf + currentPc());
      if (status == GED_RETURN_VALUE_SUCCESS) {
        // If auto compation is turned on, in case we need to patch later.
        inst->addInstOpt(InstOpt::COMPACTED);
        iLen = 8;
      } else if (status == GED_RETURN_VALUE_NO_COMPACT_FORM) {
        if (mustCompact) {
          if (m_opts.explicitCompactMissIsWarning) {
            warningAtT(inst->getLoc(), "GED unable to compact instruction");
          } else {
            errorAtT(inst->getLoc(), "GED unable to compact instruction");
          }
        }
      } // else: some other error (unreachable?)
    }

    // try native encoding if compaction failed
    if (status != GED_RETURN_VALUE_SUCCESS) {
      inst->removeInstOpt(InstOpt::COMPACTED);
      status = GED_EncodeIns(&m_gedInst, GED_INS_TYPE_NATIVE,
                             m_instBuf + currentPc());
      if (status != GED_RETURN_VALUE_SUCCESS) {
        errorAtT(inst->getLoc(), "GED unable to encode instruction: ",
                 gedReturnValueToString(status));
      }
    }
    advancePc(iLen);
  }
}

bool Encoder::getBlockOffset(const Block *b, uint32_t &pc) {
  auto iter = m_blockToOffsetMap.find(b);
  if (iter != m_blockToOffsetMap.end()) {
    pc = iter->second;
    return true;
  }
  return false;
}

void Encoder::setEncodedPC(Instruction *inst, int32_t encodedPC) {
#if 0
    auto iter = m_instPcs.find(inst);
    IGA_ASSERT(iter == m_instPcs.end(), "resetting encode PC");
    ((void)iter); // dummy use where ASSERT_USER compiles out
    m_instPcs[inst] = encodedPC;
#else
  inst->setPC(encodedPC);
#endif
}

int32_t Encoder::getEncodedPC(const Instruction *inst) const {
#if 0
    auto iter = m_instPcs.find(inst);
    if (iter == m_instPcs.end()) {
        IGA_ASSERT_FALSE("inst PC not found");
        return 0;
    }
    return iter->second;
#else
  return inst->getPC();
#endif
}

void Encoder::encodeFC(const Instruction &i) {
  const OpSpec &os = i.getOpSpec();

  if (os.is(Op::MATH)) {
    GED_MATH_FC mfc = lowerMathFC(i.getMathFc());
    GED_ENCODE(MathFC, mfc);
  } else if (os.is(Op::BFN)) {
    GED_ENCODE(BfnFC, i.getBfnFc().value);
  } else if (os.isDpasFormat()) {
    auto sf = i.getDpasFc();
    auto sdepth = GetDpasSystolicDepth(sf);
    auto rc = GetDpasRepeatCount(sf);
    {
    GED_ENCODE(SystolicDepth, sdepth);
    GED_ENCODE(RepeatCount, rc);
    }
  } else if (os.isAnySendFormat()) {
    if (platform() >= Platform::XE) {
      // on earlier platforms this is stowed in ExDesc
      auto sfid = lowerSFID(i.getSendFc());
      GED_ENCODE(SFID, sfid);
    }
  } else if (os.is(Op::SYNC)) {
    GED_SYNC_FC wfc = lowerSyncFC(i.getSyncFc());
    GED_ENCODE(SyncFC, wfc);
  } else if (os.supportsBranchCtrl()) {
    GED_ENCODE(BranchCtrl, lowerBranchCntrl(i.getBranchCtrl()));
  } else if (os.supportsSubfunction()) {
    IGA_ASSERT_FALSE("encoder needs to encode subfunction for this op");
  }
}

void Encoder::encodeInstruction(Instruction &inst) {
  m_opcode = inst.getOp();
  const auto gedPlat = lowerPlatform(platform());
  const auto gedOp = lowerOpcode(m_opcode);
  if (gedOp == GED_OPCODE_INVALID) {
    fatalAtT(inst.getLoc(), "OpSpec failed to map to a valid GED opcode");
    return;
  }
  GED_RETURN_VALUE status = GED_InitEmptyIns(gedPlat, &m_gedInst, gedOp);
  if (status != GED_RETURN_VALUE_SUCCESS) {
    fatalAtT(inst.getLoc(), "GED failed to create instruction template");
    return;
  }

  if (m_opcode == Op::ILLEGAL) {
    // GED does all the work for this instruction
    return;
  } else if (m_opcode == Op::NOP) {
    // nop supports {Breakpoint}
    encodeOptions(inst);
    return;
  }
  const OpSpec &os = inst.getOpSpec();

  // Dwindling cases where we must use Align16
  // Pre-GEN10 ternary ops are all align16
  bool isTernary = platform() < Platform::GEN10 && os.isTernary();
  bool contextSaveRestoreNeedsAlign16 =
      isAlign16MathMacroRegisterCsrOperand(inst.getDestination()) ||
      isAlign16MathMacroRegisterCsrOperand(inst.getSource(0));
  // IEEE macro instructions (math.invm and math.rsqrtm)
  bool align16MacroInst = m_model.supportsAlign16MacroInst() && inst.isMacro();
  m_encodeAlign16 =
      isTernary || contextSaveRestoreNeedsAlign16 || align16MacroInst;
  GED_ACCESS_MODE accessMode =
      m_encodeAlign16 ? GED_ACCESS_MODE_Align16 : GED_ACCESS_MODE_Align1;
  if (m_model.supportsAccessMode()) {
    GED_ENCODE(AccessMode, accessMode);
  } // else GED will crash given this call (even given Align1)

  ExecSize execSize = inst.getExecSize();
  if (os.isTernary() && m_model.supportsAlign16Ternary() &&
      inst.getExecSize() == ExecSize::SIMD1) {
    // scalar ternary workaround for Align16
    // (c.f. Encoder::encodeTernaryDestinationAlign16)
    execSize = inst.getDestination().getType() == Type::DF ? ExecSize::SIMD2
                                                           : ExecSize::SIMD4;
  }
  GED_ENCODE(ExecSize, lowerExecSize(execSize));

  encodeFC(inst);

  if (os.supportsQtrCtrl()) {
    // use ExecSize from above since it may  have been modified
    GED_CHANNEL_OFFSET qtrCtrl = lowerQtrCtrl(inst.getChannelOffset());
    GED_ENCODE(ChannelOffset, qtrCtrl);
  }

  GED_ENCODE(MaskCtrl, lowerEmask(inst.getMaskCtrl()));

  // Predicate
  const Predication &pred = inst.getPredication();
  if (os.supportsPredication()) {
    GED_ENCODE(PredCtrl, lowerPredCtrl(pred.function));
  } else {
    GED_ENCODE(PredCtrl, GED_PRED_CTRL_Normal);
  }

  bool isImm64Src0Overlap =
      platform() >= Platform::XE &&
      inst.getSource(0).getKind() == Operand::Kind::IMMEDIATE &&
      TypeIs64b(inst.getSource(0).getType());

  if (!isImm64Src0Overlap && inst.getOpSpec().supportsFlagModifier()) {
    if (os.op == Op::BFN) {
      switch (inst.getFlagModifier()) {
      case FlagModifier::NONE:
      case FlagModifier::EQ:
      case FlagModifier::GT:
      case FlagModifier::LT:
        // GED does the special mapping to CondMod2
        // only a subset of cond modifiers are supported on this op
        GED_ENCODE(CondModifier, lowerCondModifier(inst.getFlagModifier()));
        break;
      default:
        errorT("this instruction format only supports "
               "(eq), (gt), and (lt) conditional modifiers");
      }
    } else {
      GED_ENCODE(CondModifier, lowerCondModifier(inst.getFlagModifier()));
    }
  }

  bool hasFlagRegField = true;
  // For >= XE_HPC, Some fields only exist when having CondCtrl or PredCtrl:
  // PredInv, FlagRegNum, FlagSubRegNum
  // In GED, either CondCtrl or PredCtrl have to be set to non-zero before
  // these fields can be set
  if (platform() >= Platform::XE_HPC) {
    hasFlagRegField = (inst.getFlagModifier() != FlagModifier::NONE) ||
                      (pred.function != PredCtrl::NONE) || inst.isBranching();
  }

  if (os.supportsPredication() && hasFlagRegField)
    GED_ENCODE(PredInv,
               pred.inverse ? GED_PRED_INV_Invert : GED_PRED_INV_Normal);

  // GED_ExecutionDataType
  RegRef flagReg = inst.getFlagReg();
  if (hasFlagRegField && (flagReg != REGREF_INVALID)) {
    GED_ENCODE(FlagRegNum, static_cast<uint32_t>(inst.getFlagReg().regNum));
    GED_ENCODE(FlagSubRegNum, inst.getFlagReg().subRegNum);
  }

  // set AccWrEn where supported
  if (inst.hasInstOpt(InstOpt::ACCWREN)) {
    GED_ENCODE(AccWrCtrl, GED_ACC_WR_CTRL_AccWrEn);
  }

  if (os.isBranching()) {
    if (m_model.supportsSimplifiedBranches()) {
      encodeBranchingInstructionSimplified(inst);
    } else {
      encodeBranchingInstruction(inst);
    }
    // options encoded internally
  } else if (os.isTernary()) {
    encodeTernaryInstruction(inst, accessMode);
  } else if (os.isAnySendFormat()) {
    encodeSendInstruction(inst);
  } else if (os.is(Op::SYNC)) {
    encodeSyncInstruction(inst);
  } else {
    encodeBasicInstruction(inst, accessMode);
  }

  if (!hasFatalError()) {
    encodeOptions(inst);

    // setup for back patching on branching ops
    if (os.isBranching() || inst.isMovWithLabel()) {
      bool src0IsLabel = inst.getSource(0).isImm();
      bool src1IsLabel = inst.getSourceCount() > 1 && inst.getSource(1).isImm();
      if (src0IsLabel || src1IsLabel) {
        m_needToPatch.emplace_back(&inst, m_gedInst, m_instBuf + currentPc());
        // Force not to compact label instructions to avoid the compaction error
        // when auto-compaction is enabled. We could set this inst to
        // compactable during Encoder::encodeBlock when the value is unknown
        // (and assume to be 0). But we can only compact imm values use up to 12
        // bits.
        inst.addInstOpt(InstOpt::NOCOMPACT);
      }
    }
  }
}

void Encoder::encodeBasicInstruction(const Instruction &inst,
                                     GED_ACCESS_MODE accessMode) {
  const OpSpec &os = inst.getOpSpec();

  if (os.isBinaryWithExecDataType())
    GED_ENCODE(ExecutionDataType,
               lowerExecDataType(inst.getDestination().getType()));

  switch (inst.getSourceCount()) {
  case 2:
    encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(1), accessMode);
    // vvvv fall through vvvv
  case 1:
    encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0), accessMode);
  }

  if (os.supportsDestination()) {
    encodeBasicDestination(inst, inst.getDestination(), accessMode);
  } else if (os.op == Op::WAIT) {
    // wait has an implicit destination (same as first source)
    // but with dst region of <1>
    Operand copy(inst.getSource(0));
    copy.setRegion(Region::DST1);
    encodeBasicDestination(inst, copy);
  }
}

void Encoder::encodeTernaryDestinationAlign1(const Instruction &inst) {
  const Operand &dst = inst.getDestination();

  if (inst.getOpSpec().supportsSaturation()) {
    GED_ENCODE(Saturate, lowerSaturate(dst.getDstModifier()));
  }
  GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
  GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
  encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);

  if (inst.isMacro()) {
    GED_ENCODE(DstMathMacroExt, lowerMathMacroReg(dst.getMathMacroExt()));
    // GED_ENCODE(DstHorzStride, 1);
  } else {
    GED_ENCODE(DstSubRegNum,
               SubRegToBinaryOffset(dst.getDirRegRef().subRegNum,
                                    dst.getDirRegName(), dst.getType(),
                                    m_model.platform));
    if (inst.getOpSpec().hasDstHorzStride())
      GED_ENCODE(DstHorzStride, static_cast<int>(dst.getRegion().getHz()));
  }
}

template <SourceIndex S>
void Encoder::encodeTernarySourceAlign1(const Instruction &inst) {
  // CNL+ align1 ternary
  if (platform() < Platform::GEN10) {
    fatalT("src", (int)S,
           ": align1 ternary is not supported on this "
           "platform");
    return;
  }

  const Operand &src = inst.getSource(S);
  Type srcType = src.getType();
  // DPAS
  if (inst.getOpSpec().isDpasFormat()) {
    // src0's type is the type for all sources

    if (S == SourceIndex::SRC0) {
      GED_ENCODE(Src0DataType, lowerDataType(srcType));
      // GED: src0 HS = 0, VS=3
    } else if (S == SourceIndex::SRC1) {
      GED_ENCODE(Src1Precision, lowerSubBytePrecision(srcType));
      // GED sets both the type and the precision at the same time for us
      // GED: src1 HS = 1, VS=3
      // via this higher-level API
    } else if (S == SourceIndex::SRC2) {
      GED_ENCODE(Src2Precision, lowerSubBytePrecision(srcType));
      // GED: src2 HS = 3
      // GED sets both the type and the precision at the same time for us
      // via this higher-level API
    }
    encodeSrcRegFile<S>(lowerRegFile(src.getDirRegName()));
    encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
    encodeSrcSubRegNum<S>(SubRegToBinaryOffset(src.getDirRegRef().subRegNum,
                                               src.getDirRegName(), srcType,
                                               m_model.platform));

    return;
  }

  // GED will catch any mismatch between float and int (illegal mixed mode)
  encodeSrcType<S>(srcType); // GED dependency requires type before reg file

  switch (src.getKind()) {
  case Operand::Kind::DIRECT:
  case Operand::Kind::MACRO: {
    encodeSrcRegFile<S>(lowerRegFile(src.getDirRegName()));

    if (platform() <= Platform::GEN11) {
      encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
    }

    // source modifiers
    if (inst.getOpSpec().supportsSourceModifiers()) {
      encodeSrcModifier<S>(src.getSrcModifier());
    }

    // regioning
    //
    // ternary align1 puts SpcAcc into subreg, so regions may be set
    // in all cases
    // * madm doesn't have a region in GEN9 ...
    //   it does in GEN10+, but we haven't supported it in syntax yet
    //   and leave it to GED to set it
    // * src0 and src1 only has <w;h>, src2 only has <h>
    bool hasSrcRgnHz = !inst.isMacro();
    bool hasSrcRgnVt = !inst.isMacro() && S < SourceIndex::SRC2;
    bool hasReducedRegion =
        m_model.srcHasReducedRegion(static_cast<uint32_t>(S));
    auto rgn = src.getRegion();
    if (hasReducedRegion) {
      encodeSrcReducedRegion<S>(rgn);
    } else {
      if (hasSrcRgnHz)
        encodeSrcRegionHorz<S>(rgn.getHz());
      if (hasSrcRgnVt)
        encodeTernarySrcRegionVert(S, rgn.getVt());
    }
    // register and subregister
    encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
    if (inst.isMacro()) {
      if (platform() < Platform::GEN11) {
        fatalT("src", (int)S, ": math macro operands require Align16");
        return;
      }
      encodeSrcMathMacroReg<S>(src.getMathMacroExt());
      if (!hasReducedRegion) {
        if (S != SourceIndex::SRC2) {
          encodeTernarySrcRegionVert(S, Region::Vert::VT_4);
        }
        encodeSrcRegionHorz<S>(Region::Horz::HZ_1);
      }
    } else {
      auto subReg = SubRegToBinaryOffset(src.getDirRegRef().subRegNum,
                                         src.getDirRegName(), src.getType(),
                                         m_model.platform);
      encodeSrcSubRegNum<S>(subReg);
    }
    break;
  }
  case Operand::Kind::IMMEDIATE:
    if (S == SourceIndex::SRC1) {
      fatalT("src1: immediate operand in ternary align1 must be "
             "src0 or src2");
      return;
    }
    encodeSrcRegFile<S>(GED_REG_FILE_IMM);
    if (platform() < Platform::GEN10) {
      encodeImmVal(src.getImmediateValue(), src.getType());
    } else {
      encodeTernaryImmVal<S>(src.getImmediateValue(), src.getType());
    }
    break;
  default:
    fatalT("src", (int)S, ": invalid operand kind");
    return;
  }
}

void Encoder::encodeTernaryInstruction(const Instruction &inst,
                                       GED_ACCESS_MODE accessMode) {
  if (accessMode == GED_ACCESS_MODE_Align1) {
    encodeTernaryAlign1Instruction(inst);
  } else {
    encodeTernaryAlign16Instruction(inst);
  }
}
void Encoder::encodeTernaryAlign16Instruction(const Instruction &inst) {
  if (inst.getOpSpec().supportsDestination()) {
    encodeTernaryDestinationAlign16(inst);
  }
  encodeTernarySourceAlign16<SourceIndex::SRC0>(inst);
  encodeTernarySourceAlign16<SourceIndex::SRC1>(inst);
  encodeTernarySourceAlign16<SourceIndex::SRC2>(inst);
}
void Encoder::encodeTernaryAlign1Instruction(const Instruction &inst) {
  // set ExecutionDataType (integral or floating)
  // the operands must be part of the same type set
  Type src0Type = inst.getSource(0).getType();
  GED_EXECUTION_DATA_TYPE execDataType;
  if (isTernaryAlign1Floating(src0Type)) {
    execDataType = GED_EXECUTION_DATA_TYPE_Float;
  } else if (isTernaryAlign1Integral(src0Type)) {
    execDataType = GED_EXECUTION_DATA_TYPE_Integer;
  } else {
    fatalT("src0: unsupported type for ternary align1 encoding");
    return;
  }
  GED_ENCODE(ExecutionDataType, execDataType);

  if (inst.getOpSpec().supportsDestination()) {
    encodeTernaryDestinationAlign1(inst);
  }
  encodeTernarySourceAlign1<SourceIndex::SRC0>(inst);
  encodeTernarySourceAlign1<SourceIndex::SRC1>(inst);
  encodeTernarySourceAlign1<SourceIndex::SRC2>(inst);
}

void Encoder::encodeBranchingInstruction(const Instruction &inst) {
  // the destination stride is always 1 for all control flow
  GED_ENCODE(DstHorzStride, 1);

  // control flow instructions require patching later if any operand is a label
  bool src0IsLabel = inst.getSource(0).getKind() == Operand::Kind::LABEL;

  // break up instructions into various classes
  //   - stuff with implicit operands: jmpi
  //   - stuff that can take register operands: call, calla, return
  //   - everything else: if, else, while, ..., goto, join, ...
  if (m_opcode == Op::JMPI) {
    // jmpi encodes the syntax
    //   jmpi (1) LABEL
    //   jmpi (1) reg32
    // as
    //   jmpi (1) ip ip LABEL
    //   jmpi (1) ip ip reg32
    //
    // "Restriction: The index data type must be D (Signed DWord Integer)."
    //
    // implicit IP ...
    encodeBasicDestination(inst, Operand::DST_REG_IP_UD);
    encodeBasicSource<SourceIndex::SRC0>(inst, Operand::SRC_REG_IP_UD);
    GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
    if (src0IsLabel) {
      // jmpi (1) LABEL   (encodes into Src1)
      GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
    } else {
      // jmpi (1) reg32   (encodes into Src1)
      encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(0));
    }
  } else if (m_opcode == Op::CALL || m_opcode == Op::CALLA ||
             m_opcode == Op::RET) {
    // e.g. call, calla, ret
    //   call  (..)   imm32
    //   call  (..)   reg32
    //   calla (..)   imm32
    //   ret   (...)  reg32       => encodes as ret (...) null reg
    //
    if (m_opcode == Op::CALL || m_opcode == Op::CALLA) {
      encodeBasicDestination(inst, inst.getDestination());
    } else if (m_opcode == Op::RET) {
      encodeBasicDestination(inst, Operand::DST_REG_NULL_UD);
      encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0));
    }

    if (m_opcode == Op::CALL || m_opcode == Op::CALLA) {
      if (src0IsLabel) {
        // op == CALL (since it's a label), hence we have
        // call (..) imm32 => which uses src1
        GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
        GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
      } else {
        // call  (..) reg32
        // calla (..) imm32
        encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(0));
      }

      // <2;2,1> restriction for CALL and CALLA restriction is only for
      // IVB+HSW, but simulator has it until CNL.  So we have to support it
      // until we get CNL HW validation moves to it
      if (callNeedsSrc0Region221(inst)) {
        GED_ENCODE(Src0VertStride, 2);
        GED_ENCODE(Src0Width, 2);
        GED_ENCODE(Src0HorzStride, 1);
      }
      // though it's not stated in the spec, ICL requires src0 region be set to
      // <2;4,1>
      else if (callNeedsSrc0Region241(inst)) {
        GED_ENCODE(Src0VertStride, 2);
        GED_ENCODE(Src0Width, 4);
        GED_ENCODE(Src0HorzStride, 1);
      }
    }
  } else if (m_opcode == Op::BRD || m_opcode == Op::BRC) {
    // [brd/brc]: The ip register must be used (for example, by the assembler)
    // as dst.
    encodeBasicDestination(inst, Operand::DST_REG_IP_D);
    if (!src0IsLabel) {
      encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0));
    }
    GED_DATA_TYPE ty =
        platform() < Platform::GEN8 ? GED_DATA_TYPE_w : GED_DATA_TYPE_d;
    GED_ENCODE(Src0RegFile, src0IsLabel ? GED_REG_FILE_IMM : GED_REG_FILE_GRF);
    GED_ENCODE(Src0DataType, ty);
    // GED automatically sets?
    // if (m_opcode == Op::BRC && src0IsLabel) {
    //    GED_ENCODE(Src1DataType, ty);
    // }
    // if (m_opcode == Op::BRD && m_inst->getOpSpec().hasImplicitSrcRegion(0)) {
    //     encodeSrcRegion(SourceIndex::SRC0,
    //         m_inst->getOpSpec().implicitSrcRegion(0));
    // }
  } else {
    // regular control flow that only accepts immediate values
    // e.g. if, else, endif, while, cont, break, goto, join, halt

    // Apparently, the implicit destination on these instructions
    // is null instead of ip (unlike jmpi etc)
    // destination is ip<1>:ud
    //
    // encodeDestination(&Operand::DST_IP);
    encodeBasicDestination(inst, Operand::DST_REG_NULL_UD);

    // UIP
    if (m_opcode != Op::ENDIF && m_opcode != Op::WHILE &&
        m_opcode != Op::JOIN) {
      // if/else/halt/brk/cont.... all require :d on operands
      GED_DATA_TYPE ty =
          platform() < Platform::GEN8 ? GED_DATA_TYPE_w : GED_DATA_TYPE_d;
      GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
      GED_ENCODE(Src0DataType, ty);
    }
    // before XE don't need to set JIP for control flow instructions that have
    // UIP JIP
    if (m_opcode == Op::WHILE || m_opcode == Op::ENDIF ||
        m_opcode == Op::JOIN) {
      GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
      GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
    }
  }
}

void Encoder::encodeBranchingInstructionSimplified(const Instruction &inst) {
  const OpSpec &os = inst.getOpSpec();

  // set branch control
  if (os.supportsBranchCtrl()) {
    GED_ENCODE(BranchCtrl, lowerBranchCntrl(inst.getBranchCtrl()));
  }

  // control flow instructions require patching later if any operand is a label
  const Operand &src0 = inst.getSource(0);
  bool src0IsLabel = src0.getKind() == Operand::Kind::LABEL;

  // for jmpi HW will take care of IP so don't need to encode it for dst/src0
  if (inst.getOpSpec().supportsDestination()) {
    encodeBranchDestination(inst.getDestination());
  } else {
    encodeBranchDestination(Operand::DST_REG_NULL_UD);
  }
  // regualar control flow that only accepts immediate values
  // e.g. if, else, endif, while, cont, break, goto, join, halt

  // encoding JIP
  if (src0IsLabel) {
    GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
    // if (src0.getTargetBlock() == nullptr) {
    //    // the input value is immediate; use m_immVal as the value
    //    encodeBranchSource(src0);
    // }
  } else {
    // jmpi, call, brc, ...
    if (src0.getKind() == Operand::Kind::INDIRECT)
      errorT("branch instructions forbid indirect register mode");
    encodeBranchSource(src0);
  }

  if (inst.getSourceCount() == 2) {
    // encoding UIP always IMM except for brc with a register argument
    if (inst.getOp() != Op::BRC || src0.isImm()) {
      GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
    }
  }
}

void Encoder::encodeSendInstruction(const Instruction &i) {
  ////////////////////////////////////////////
  // send operands
  const OpSpec &os = i.getOpSpec();
  encodeSendDestination(i.getDestination());
  if (os.isSendsFormat()) {
    // old sends sources get special treatment
    encodeSendsSource0(i.getSource(0));
    encodeSendsSource1(i.getSource(1));
  } else {
    encodeSendSource0(i.getSource(0));
    if (m_model.supportsXeSend()) {
      encodeSendsSource1(i.getSource(1));
    }
  }


  ////////////////////////////////////////////
  // send descriptors and other gunk
  encodeSendDescs(i);

  ////////////////////////////////////////////
  // send options

  // FusionCtrl is removed from XeHPC+
  bool hasFusion = platform() >= Platform::XE && platform() < Platform::XE_HPC;
  if (hasFusion) {
    GED_ENCODE(FusionCtrl, i.hasInstOpt(InstOpt::SERIALIZE)
                               ? GED_FUSION_CTRL_Serialized
                               : GED_FUSION_CTRL_Normal);
  }

  if (i.hasInstOpt(InstOpt::EOT)) {
    GED_ENCODE(EOT, GED_EOT_EOT);
  }
} // end: encodeSendInstruction

void Encoder::encodeSendDescs(const Instruction &i) {
  if (platform() < Platform::XE) {
    encodeSendDescsPreXe(i);
  } else if (platform() == Platform::XE) {
    encodeSendDescsXe(i);
  } else if (platform() == Platform::XE_HP) {
    encodeSendDescsXeHP(i);
  } else if (platform() == Platform::XE_HPG || platform() == Platform::XE_HPC
  ) {
    encodeSendDescsXeHPG(i);
  } else if (platform() >= Platform::XE2) {
    encodeSendDescsXe2(i);
  } else {
    errorT("unsupported platform");
  }

  bool noEOTinExDesc = m_model.supportsXeSend();
  if (noEOTinExDesc && i.getExtMsgDescriptor().isImm() &&
      (i.getExtMsgDescriptor().imm & 1 << 5))
    errorT("Encoder: Send exDesc[5] must not be set (the legacy EOT bit)");
}

void Encoder::encodeSendDescsPreXe(const Instruction &i) {
  SendDesc exDesc = i.getExtMsgDescriptor();
  const OpSpec &os = i.getOpSpec();
  if (exDesc.isReg()) {
    if (os.isSendFormat()) {
      errorT("unary send forbids register ExDesc");
    }
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
    GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
  } else {
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(ExMsgDescImm, exDesc.imm);
  }

  SendDesc desc = i.getMsgDescriptor();
  if (desc.isReg()) {
    if (platform() == Platform::GEN9) {
      uint32_t msgDescriptor = 0;
      // There is a HW bug on SKL where HW will only copy bits 0-28 from
      // the address register (descriptor register) and will miss bit 30
      // of the descriptor.  Hence, even in the case of an register
      // descriptor we must program bit 30 as immediate (it will be
      // taken from the encoding and OR'd in correctly)
      //
      // E.g. (old syntax)
      //   sends (8) r74:hf r16 r73 0x42:ud a0.0 {Align1, Q1, NoMask}
      //       // sampler, resLen=3, msgLen=1, extMsgLen=1
      // On SKL, HW will copy bits 29-31 from the actual immediate
      // descriptor bits.  Hence, we must set immediate descriptor
      // bit 30 even in the case of a register descriptor. (For SKL).
      //
      // For 3D sampler bit 30 indicates HF/F return format.
      // For render target write bit 30 indicates HF/F input...
      // Thankfully for SKL the 3D sampler doesn't support HF input.
      // For CNL it does, and that will be bit 29.
      // But this bug should be fixed in CNL.
      if (platform() == Platform::GEN9 && desc.isReg()) {
        if (i.getDestination().getType() == Type::HF ||
            i.getSource(0).getType() == Type::HF) {
          msgDescriptor |= (1 << 30);
        }
      }
      GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
      GED_ENCODE(MsgDesc, msgDescriptor);
    }
    GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
    const RegInfo *ri = m_model.lookupRegInfoByRegName(RegName::ARF_A);
    IGA_ASSERT(ri, "failed to find a0 register");
    GED_ENCODE(DescRegNum, ri->encodeARFRegNum((int)desc.reg.regNum));
  } else if (desc.isImm()) {
    GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(MsgDesc, desc.imm);
  }
}
void Encoder::encodeSendDescsXe(const Instruction &i) {
  SendDesc exDesc = i.getExtMsgDescriptor();
  if (exDesc.isReg()) {
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
    GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
  } else {
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(ExMsgDescImm, exDesc.imm);
  }

  SendDesc desc = i.getMsgDescriptor();
  if (desc.isReg()) {
    GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
    // a0.0 is implied (there's no field)
    if (desc.reg.subRegNum != 0) {
      errorT("send with reg desc must be a0.0");
    }
  } else {
    GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(MsgDesc, desc.imm);
  }
}

// A bit harder than Xe
//   * If ExBSO is set then Src1Length holds xlen
//   * CPS has it's own field (ExDesc[11]) only if ExDesc.IsReg
void Encoder::encodeSendDescsXeHP(const Instruction &i) {
  SendDesc exDesc = i.getExtMsgDescriptor();
  if (exDesc.isReg()) {
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
    GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
    GED_ENCODE(ExBSO, i.hasInstOpt(InstOpt::EXBSO) ? 1 : 0);
    if (i.hasInstOpt(InstOpt::EXBSO)) {
      GED_ENCODE(CPS, i.hasInstOpt(InstOpt::CPS) ? 1 : 0);
      GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
    } else if (i.hasInstOpt(InstOpt::CPS)) {
      errorT("{CPS} requires {ExBSO}");
    }
  } else {
    if (i.hasInstOpt(InstOpt::CPS)) {
      warningT("when ExDesc is immediate use ExDesc[11] rather than {CPS}");
      exDesc.imm |= 1 << 11;
    }
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(ExMsgDescImm, exDesc.imm);
  }

  SendDesc desc = i.getMsgDescriptor();
  if (desc.isReg()) {
    GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
    if (desc.reg.subRegNum != 0) { // a0.0 is implied (there's no field)
      errorT("send with reg desc must be a0.0");
    }
  } else {
    GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(MsgDesc, desc.imm);
  }
}

// Similar to XeHP, except
//    * ExDesc.IsImm implies use of Src1Length (Src.Length is in EU bits)
void Encoder::encodeSendDescsXeHPG(const Instruction &i) {
  SendDesc exDesc = i.getExtMsgDescriptor();
  if (exDesc.isReg()) {
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
    GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
    GED_ENCODE(ExBSO, i.hasInstOpt(InstOpt::EXBSO) ? 1 : 0);
    if (i.hasInstOpt(InstOpt::EXBSO)) {
      GED_ENCODE(CPS, i.hasInstOpt(InstOpt::CPS) ? 1 : 0);
      GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
    } else if (i.hasInstOpt(InstOpt::CPS)) {
      errorT("{CPS} requires {ExBSO}");
    }
  } else {
    if (i.hasInstOpt(InstOpt::CPS)) {
      warningT("when ExDesc is immediate use ExDesc[11] rather than {CPS}");
      exDesc.imm |= 1 << 11;
    }
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(ExMsgDescImm, exDesc.imm);
    GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
  }

  SendDesc desc = i.getMsgDescriptor();
  if (desc.isReg()) {
    GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
    if (desc.reg.subRegNum != 0) { // a0.0 is implied (there's no field)
      errorT("send with reg desc must be a0.0");
    }
  } else {
    GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(MsgDesc, desc.imm);
  }
}

// Similar to XeHPG/XeHPC except:
//   * ExBSO doesn't exist for UGM (high bit of ExImmOffDesc)
//   * if ExBSO field exists, it denotes if Src1Len in EU field or in ExDesc
//   * CPS is gone (use ExImmOff[11])
//   * If ExDesc.IsReg, then we also set ExImmOffDesc. And if it's UGM,
//     ExDescImm[15] is the same bit as ExBSO (ExBSO not)
void Encoder::encodeSendDescsXe2(const Instruction &i) {
  SendDesc exDesc = i.getExtMsgDescriptor();
  bool hasSrc1Length = true;
  // Gather Send lacks Src1.Len
  // (the same field [103:99] encoding field becomes Src0.SubRegNum)
  if (i.isGatherSend())
    hasSrc1Length = false;
  if (exDesc.isReg()) {
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
    // set ExBSO for non-UGM
    // (UGM uses the ExBSO bit for ExDescImm[15])
    uint32_t exImmOffDesc = i.getExtImmOffDescriptor();
    if (i.getSendFc() != SFID::UGM) {
      // ExDescImm[15] must not be set for non-UGM which the bit is ExBSO
      if (exImmOffDesc & (1 << 15)) {
        exImmOffDesc &= ~(1 << 15);
        errorT("ExDescImm[15] overlaps ExBSO for this SFID and must be 0");
      }
    }
    if (exImmOffDesc & (0x7 << 16)) {
      exImmOffDesc &= ~(0x7 << 16);
      errorT("ExDescImm[18:16] overlaps ExDesc.Reg and must be 0");
    }
    if (exImmOffDesc & 0x7FF) {
      exImmOffDesc &= ~0x7FF;
      errorT("ExDescImm[10:0] are unmapped and must be 0");
    }
    GED_ENCODE(ExMsgDescImm, exImmOffDesc);

    if (i.getSendFc() != SFID::UGM) {
      // must be done *after* ExMsgDescImm since this will stomp
      // ExDescImm[15] (we could flip it around and just set ExDescImm[15])
      GED_ENCODE(ExBSO, i.hasInstOpt(InstOpt::EXBSO) ? 1 : 0);
      if (i.hasInstOpt(InstOpt::EXBSO)) {
        GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
      }
    } else {
      if (i.hasInstOpt(InstOpt::EXBSO))
        warningAtT(i.getLoc(),
                   "{ExBSO} does not exist for send.ugm on this platform");
      // for UGM, src1.len must be in EU field
      if (hasSrc1Length)
        GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
    }

    if (i.hasInstOpt(InstOpt::CPS)) {
      // this is ExDesc[11]
      errorT("{CPS} does not exist on this platform");
    }

    // has to be set after ExMsgDescImm due to overlap
    GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
  } else {
    GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(ExMsgDescImm, exDesc.imm);
    if (hasSrc1Length)
      GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
  }

  SendDesc desc = i.getMsgDescriptor();
  if (desc.isReg()) {
    GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
    if (desc.reg.subRegNum != 0) { // a0.0 is implied (there's no field)
      errorT("send with reg desc must be a0.0");
    }
  } else {
    GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
    GED_ENCODE(MsgDesc, desc.imm);
  }
}


void Encoder::encodeSyncInstruction(const Instruction &inst) {
  // Set the Dst.HorStride to 1 so that "sync.bar null" can be compacted
  GED_ENCODE(DstHorzStride, 1);

  const Operand &src = inst.getSource(0);
  if (src.getKind() == Operand::Kind::IMMEDIATE) {
    encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_IMM);
    encodeSrcType<SourceIndex::SRC0>(src.getType());
    encodeImmVal(src.getImmediateValue(), src.getType());
  } else {
    if (platform() <= Platform::XE_HPG) {
      encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_ARF);
    } else {
      // XeHPC+ supports sync with reg32. For earlier platforms encode it to the
      // null reg anyway. If not doing so we'll encounter some weird behavior on
      // validation. Suspect it's becuase on some previous platforms' testcase
      // there are reg32 those are not valid, but IGA workaround (set it to
      // NULL) them
      if (src.isNull()) {
        encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_ARF);
      } else {
        // currently only flag register is supported in sync.bar
        encodeSrcRegFile<SourceIndex::SRC0>(lowerRegFile(src.getDirRegName()));
        encodeSrcReg<SourceIndex::SRC0>(src.getDirRegName(),
                                        src.getDirRegRef().regNum);
        encodeSrcType<SourceIndex::SRC0>(src.getType());
        // must be flag register (otherwise GED will return error), encode the
        // subreg directly.
        GED_ENCODE(Src0SubRegNum,
                   SubRegToBinaryOffset(src.getDirRegRef().subRegNum,
                                        src.getDirRegName(), src.getType(),
                                        m_model.platform));
      }
    }
  }
}

void Encoder::encodeBranchDestination(const Operand &dst) {
  GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
  encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
  GED_ENCODE(DstSubRegNum,
             SubRegToBinaryOffset(dst.getDirRegRef().subRegNum,
                                  dst.getDirRegName(), dst.getType(),
                                  m_model.platform));
}

void Encoder::encodeBasicDestination(const Instruction &inst,
                                     const Operand &dst,
                                     GED_ACCESS_MODE accessMode) {
  IGA_ASSERT(accessMode != GED_ACCESS_MODE_Align16 || m_model.supportsAlign16(),
             "Align16 not supported on this platform.");

  GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
  switch (dst.getKind()) {
  case Operand::Kind::DIRECT:
  case Operand::Kind::MACRO:
    GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
    GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
    if (inst.getOpSpec().supportsSaturation()) {
      GED_ENCODE(Saturate, lowerSaturate(dst.getDstModifier()));
    }
    // VVVVV   fallthrough  VVVVV
  default:
    break;
  }

  switch (dst.getKind()) {
  case Operand::Kind::DIRECT:
    if (accessMode == GED_ACCESS_MODE_Align16) {
      if (dst.getRegion() != Region::DST1) {
        fatalT("dst has inconvertible region for Align16 encoding");
        return;
      }
      if (isAlign16MathMacroRegisterCsrOperand(dst)) {
        // acc2.XXXX on BDW .. SKL is context save and restore
        // This is really mme0
        encodeDstReg(RegName::ARF_MME, 0);
        // on GEN8 and GEN9 all encode as acc2, but the mux varies
        // to distinguish which acc it really is.
        GED_DST_CHAN_EN chEn;
        switch (dst.getDirRegRef().regNum) {
        /// case 0: ... acc2 actually uses Align1!
        // old-style for acc2 would be:
        // mov(8) r113:ud acc2:ud  {NoMask} // acc2
        //
        // acc3-9 are Align16
        case 1:
          chEn = GED_DST_CHAN_EN_x;
          break; // mme0/acc3 -> acc2.x (0001b)
        case 2:
          chEn = GED_DST_CHAN_EN_y;
          break; // mme1/acc4 -> acc2.y (0010b)
        case 3:
          chEn = GED_DST_CHAN_EN_xy;
          break;
        case 4:
          chEn = GED_DST_CHAN_EN_z;
          break;
        case 5:
          chEn = GED_DST_CHAN_EN_xz;
          break;
        case 6:
          chEn = GED_DST_CHAN_EN_yz;
          break;
        case 7:
          chEn = GED_DST_CHAN_EN_xyzw;
          break; // mme7/acc9 -> acc2.xyzw (0111b)
        default:
          IGA_ASSERT_FALSE("unreachable");
          chEn = GED_DST_CHAN_EN_x;
        }
        GED_ENCODE(DstChanEn, chEn);
      } else {
        // normal align16 destination (this still might be a
        // CSR work around op if the src is "acc2")
        encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
        GED_ENCODE(DstChanEn, GED_DST_CHAN_EN_xyzw);
      }
      GED_ENCODE(DstSubRegNum,
                 SubRegToBinaryOffset(dst.getDirRegRef().subRegNum,
                                      dst.getDirRegName(), dst.getType(),
                                      m_model.platform));
    } else { // Align1
      encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
      GED_ENCODE(DstSubRegNum,
                 SubRegToBinaryOffset(dst.getDirRegRef().subRegNum,
                                      dst.getDirRegName(), dst.getType(),
                                      m_model.platform));
    }
    break;
  case Operand::Kind::MACRO:
    encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
    GED_ENCODE(DstMathMacroExt, lowerSpecialAcc(dst.getMathMacroExt()));
    if (accessMode == GED_ACCESS_MODE_Align1 &&
        m_model.supportsAlign16ImplicitAcc()) {
      fatalT("Align1 dst math macro unsupported on this platform.");
      return;
    }
    break;
  case Operand::Kind::INDIRECT:
    GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Indirect);
    GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
    if (inst.getOpSpec().supportsSaturation()) {
      GED_ENCODE(Saturate, lowerSaturate(dst.getDstModifier()));
    }

    GED_ENCODE(DstAddrImm, dst.getIndImmAddr());
    GED_ENCODE(DstAddrSubRegNum, dst.getIndAddrReg().subRegNum);
    break;
  default:
    IGA_ASSERT_FALSE("unsupported operand kind");
    break;
  }

  if (accessMode == GED_ACCESS_MODE_Align1) {
    auto dstRgn = dst.getRegion();
    if (inst.getOpSpec().hasImplicitDstRegion(inst.isMacro())) {
      auto dstRgnImpl = inst.getOpSpec().implicitDstRegion(inst.isMacro());
      if (dstRgn != dstRgnImpl) {
        warningT("dst region should be ", ToSyntax(dstRgnImpl));
      }
    }
    GED_ENCODE(DstHorzStride, lowerRegionHorz(dstRgn.getHz()));
  }
}

static void createChSelForCtxSavRst(GED_SWIZZLE *chSel, GED_SWIZZLE x,
                                    GED_SWIZZLE y) {
  // following IsaAsm rules here
  // reg.ab expands to reg.abbb
  chSel[0] = x;
  chSel[1] = chSel[2] = chSel[3] = y;
}

void Encoder::encodeBranchSource(const Operand &src) {
  encodeSrcRegFile<SourceIndex::SRC0>(lowerRegFile(src.getDirRegName()));
  encodeSrcReg<SourceIndex::SRC0>(src.getDirRegName(),
                                  src.getDirRegRef().regNum);
  auto subReg =
      SubRegToBinaryOffset(src.getDirRegRef().subRegNum, src.getDirRegName(),
                           Type::D, m_model.platform);
  encodeSrcSubRegNum<SourceIndex::SRC0>(subReg);
}

template <SourceIndex S>
void Encoder::encodeBasicSource(const Instruction &inst,
                                const Operand &src,
                                GED_ACCESS_MODE accessMode) {
  // setting the reg file must precede  must precede setting the type in GED
  switch (src.getKind()) {
  case Operand::Kind::DIRECT:
  case Operand::Kind::MACRO:
  case Operand::Kind::INDIRECT:
    encodeSrcRegFile<S>(lowerRegFile(src.getDirRegName()));
    if (inst.getOpSpec().supportsSourceModifiers()) {
      encodeSrcModifier<S>(src.getSrcModifier());
    } else if (src.getSrcModifier() != SrcModifier::NONE) {
      // better be invalid in the IR if unsupported
      errorT("src", (int)S, " source modifier not supported (invalid IR)");
    }
    break;
  case Operand::Kind::IMMEDIATE:
    encodeSrcRegFile<S>(GED_REG_FILE_IMM);
    break;
  default:
    break;
  }

  encodeSrcType<S>(src.getType());

  switch (src.getKind()) {
  case Operand::Kind::DIRECT:
  case Operand::Kind::MACRO: {
    encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
    if (src.getKind() == Operand::Kind::DIRECT) {
      if (isAlign16MathMacroRegisterCsrOperand(src)) {
        // BDW..SKL context save and restore of acc3...acc9
        // encode as acc2.####, ChSel will be changed in regioning code
        // recall acc2 is remapped to mme0
        encodeSrcReg<S>(RegName::ARF_MME, 0);
      } else {
        encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
        auto subReg = SubRegToBinaryOffset(src.getDirRegRef().subRegNum,
                                           src.getDirRegName(), src.getType(),
                                           m_model.platform);
        encodeSrcSubRegNum<S>(subReg);
      }
    } else { // (src.getKind() == Operand::Kind::MACRO)
      encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
      encodeSrcMathMacroReg<S>(src.getMathMacroExt());
      if (accessMode == GED_ACCESS_MODE_Align16) {
        // vertical stride has to be halved for 8B types
        if (src.getType() == Type::DF) {
          encodeSrcRegionVert<S>(Region::Vert::VT_2);
        } else {
          encodeSrcRegionVert<S>(Region::Vert::VT_4);
        }
      }
    }
    break;
  }
  case Operand::Kind::INDIRECT:
    encodeSrcAddrMode<S>(GED_ADDR_MODE_Indirect);
    encodeSrcAddrImm<S>(src.getIndImmAddr());
    encodeSrcAddrSubRegNum<S>(src.getIndAddrReg().subRegNum);
    break;
  case Operand::Kind::IMMEDIATE:
    encodeImmVal(src.getImmediateValue(), src.getType());
    break;
  default:
    // support mov label
    if (static_cast<int>(S) == 0 && inst.isMovWithLabel()) {
      GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
    } else {
      fatalT("src", (int)S,
             ": unsupported source operand kind "
             "(malformed IR)");
      return;
    }
    break;
  }

  // sets stuff found in all register accesses (not macros)
  //   - region
  switch (src.getKind()) {
  case Operand::Kind::DIRECT:
  case Operand::Kind::INDIRECT:
    if (accessMode == GED_ACCESS_MODE_Align16) {
      // r13.0<4>.xyzw is the only supported ChEn
      //      ^^^
      encodeSrcRegionVert<S>(Region::Vert::VT_4);
      GED_SWIZZLE chSel[4] = {GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z,
                              GED_SWIZZLE_w};
      if (isAlign16MathMacroRegisterCsrOperand(src)) {
        // context save and restore workaround on GEN8 and GEN9
        switch (src.getDirRegRef().regNum) {
        case 1: // acc2.yx = mme1 (acc3)
          createChSelForCtxSavRst(chSel, GED_SWIZZLE_y, GED_SWIZZLE_x);
          break;
        case 2: // acc2.zx = mme2 (acc4)
          createChSelForCtxSavRst(chSel, GED_SWIZZLE_z, GED_SWIZZLE_x);
          break;
        case 3: // acc2.wx = mme3 (acc5)
          createChSelForCtxSavRst(chSel, GED_SWIZZLE_w, GED_SWIZZLE_x);
          break;
        case 4: // acc2.xy = mme4 (acc6)
          createChSelForCtxSavRst(chSel, GED_SWIZZLE_x, GED_SWIZZLE_y);
          break;
        case 5: // acc2.yy = mme5 (acc7)
          createChSelForCtxSavRst(chSel, GED_SWIZZLE_y, GED_SWIZZLE_y);
          break;
        case 6: // acc2.zy = mme6 (acc8)
          createChSelForCtxSavRst(chSel, GED_SWIZZLE_z, GED_SWIZZLE_y);
          break;
        case 7: // acc2.wy = mme7 (acc9)
          createChSelForCtxSavRst(chSel, GED_SWIZZLE_w, GED_SWIZZLE_y);
          break;
        }
      } else {
        // normal Align16 that we are converting to Align1
        if (src.getRegion() != Region::SRC110 &&
            // supports legacy bits that may use <K;K,1> for "block"
            // access; this allows us to assemble/reassemble similar bits
            src.getRegion() != Region::SRC221 &&
            src.getRegion() != Region::SRC441 &&
            src.getRegion() != Region::SRC881 &&
            src.getRegion() != Region::SRCFF1) {
          fatalT("src", (int)S,
                 ": unsupported region for "
                 "translation to align16 encoding");
          return;
        }
        // TODO: we could permit SIMD4 with .x to mean broadcast read
        // of subreg 0, but I don't think any System Routine code uses
        // this.
        //
        // NOTE: technically we could convert
        //   r13.0<0>.xxxx to r13.0<0;1,0>
        //   r13.0<0>.yyyy to r13.1<0;1,0>
        //   r13.0<0>.zzzz to r13.2<0;1,0>
        //   r13.0<0>.wwww to r13.3<0;1,0>
        // Also be sure to handle stuff like:
        //   r13.4<0>.zzzz (would be r13.7<0;1,0>)
        //
        // Let's wait until we need this though.
      }
      encodeSrcChanSel<S>(chSel[0], chSel[1], chSel[2], chSel[3]);
    } else { // Align1
      if (m_model.srcHasReducedRegion(static_cast<uint32_t>(S)))
        encodeSrcReducedRegion<S>(src.getRegion());
      else
        encodeSrcRegion<S>(src.getRegion(), true);
    }
    break;
  case Operand::Kind::MACRO:
    if (accessMode == GED_ACCESS_MODE_Align1) {
      if (m_model.srcHasReducedRegion(static_cast<uint32_t>(S)))
        encodeSrcReducedRegion<S>(src.getRegion());
      else
        encodeSrcRegion<S>(src.getRegion());
    } // else {align16 macros use the regioning bits, don't clobber them}
    break;
  default:
    break;
  }
}

void Encoder::encodeSendDirectDestination(const Operand &dst) {
  if (platform() >= Platform::XE) {
    // auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
    // GED_ENCODE(DstDataType, lowerDataType(t));
    GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
  } else {
    auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
    GED_ENCODE(DstDataType, lowerDataType(t));

    // GED_ENCODE(Saturate, lowerSaturate(dst->getDstModifier()));
    if (m_opcode != Op::SENDS && m_opcode != Op::SENDSC) {
      GED_ENCODE(
          DstHorzStride,
          static_cast<uint32_t>(dst.getRegion().getHz())); // not used for sends
    }

    GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
    // GED_ENCODE(DstSubRegNum,
    //    SubRegToBinaryOffset(dst.getDirRegRef().subRegNum, RegName::GRF_R,
    //    dst.getType(), m_model.platform));
  }
}

void Encoder::encodeSendDestinationDataType(const Operand &dst) {
  if (platform() >= Platform::XE)
    return;

  auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
  GED_ENCODE(DstDataType, lowerDataType(t));
}

void Encoder::encodeSendDestination(const Operand &dst) {
  if (m_model.supportsUnarySend()) {
    switch (dst.getKind()) {
    case Operand::Kind::DIRECT:
      GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
      break;
    case Operand::Kind::INDIRECT:
      GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Indirect);
      break;
    default:
      fatalT("dst: unsupported destination operand kind/addrMode "
             "(malformed IR)");
      return;
    }
  }

  GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));

  if (dst.getKind() == Operand::Kind::DIRECT) {
    encodeSendDirectDestination(dst);
  } else if (dst.getKind() == Operand::Kind::INDIRECT) {
    encodeSendDestinationDataType(dst);
    if (m_opcode != Op::SENDS && m_opcode != Op::SENDSC) {
      GED_ENCODE(
          DstHorzStride,
          static_cast<uint32_t>(dst.getRegion().getHz())); // not used for sends
    }
    GED_ENCODE(DstAddrImm, dst.getIndImmAddr());
    GED_ENCODE(DstAddrSubRegNum, dst.getIndAddrReg().subRegNum);
  }
}

void Encoder::encodeSendSource0(const Operand &src) {
  if (m_model.supportsUnarySend()) {
    switch (src.getKind()) {
    case Operand::Kind::DIRECT:
      GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Direct);
      break;
    case Operand::Kind::INDIRECT:
      GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Indirect);
      break;
    default:
      fatalT("src0: unsupported source operand kind/addrMode "
             "(malformed IR)");
      return;
    }
  }

  GED_REG_FILE gedRegFile = lowerRegFile(src.getDirRegName());
  GED_ENCODE(Src0RegFile, gedRegFile);

  auto t = src.getType() == Type::INVALID ? Type::UD : src.getType();

  if (src.getKind() == Operand::Kind::DIRECT) {
    if (m_model.supportsXeSend()) {
      GED_ENCODE(Src0RegNum, src.getDirRegRef().regNum);
    } else {
      GED_ENCODE(Src0DataType, lowerDataType(t));
      GED_ENCODE(Src0RegNum, src.getDirRegRef().regNum);
      GED_ENCODE(Src0SubRegNum, src.getDirRegRef().subRegNum);
    }
  } else if (src.getKind() == Operand::Kind::INDIRECT) {
    // Gather Send src0 operand
    if (src.getDirRegName() == RegName::ARF_S) {
      // Though src0 is indirect register access, its reg number and subreg
      // number can be get from "DirRegRef" since it is not a0 access
      encodeSrcReg<SourceIndex::SRC0>(src.getDirRegName(),
                                      src.getDirRegRef().regNum);
      auto subReg = src.getDirRegRef().subRegNum;
      if (subReg % 2)
        errorT("src0 subreg must be 16b aligned");
      GED_ENCODE(Src0SubRegNum, subReg);
    } else {
      // legacy send indirect src operand
      GED_ENCODE(Src0DataType, lowerDataType(t));
      GED_ENCODE(Src0AddrSubRegNum, src.getIndAddrReg().subRegNum);
      // For platform >= XeHPC, the ImmAddr is represented in Word Offset in
      // binary,
      //     platform <  XeHPC, the ImmAddr is represented in Byte Offset in
      //     binary
      // And for all platforms, the ImmAddr is represented in Byte Offset in
      // assembly syntax
      if (platform() >= Platform::XE_HPC) {
        GED_ENCODE(Src0AddrImm, src.getIndImmAddr() / 2);
      } else {
        GED_ENCODE(Src0AddrImm, src.getIndImmAddr());
      }
    }
  }
}

// The sends opCode exists on gen9+.  There is no sends opcode on pre-gen9.
// Starting from XE, send opcode can have two sources, so the sends opcode
// is not needed.

void Encoder::encodeSendsSource0(const Operand &src) {
  // "...for sends/sendsc instructions Src0.SrcMod, ... and Src0.SrcType are not
  // used." "Src0.RegFile[1], Src1.RegFile[1] are implicitly set to 0,
  //  and Src0.RegFile[0] is implicitly set as 1 for sends/sendsc instructions."
  switch (src.getKind()) {
  case Operand::Kind::DIRECT:
    GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Direct);
    break;
  case Operand::Kind::INDIRECT:
    GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Indirect);
    break;
  default:
    fatalT("src0: unsupported source operand kind/addrMode (malformed IR)");
    return;
    break;
  }

  if (src.getKind() == Operand::Kind::DIRECT) {
    GED_ENCODE(Src0RegNum, src.getDirRegRef().regNum);
    GED_ENCODE(Src0SubRegNum, src.getDirRegRef().subRegNum);
  } else if (src.getKind() == Operand::Kind::INDIRECT) {
    auto immAddr = src.getIndImmAddr();
    // For platforms >= XeHPC, ImmAddr is encoded as words,
    //     platforms <  XeHPC, ImmAddr is encoded as bytes
    // For all platforms, ImmAddr is represented in Byte Offset in syntax
    if (platform() >= Platform::XE_HPC) {
      immAddr /= 2;
    }
    GED_ENCODE(Src0AddrImm, immAddr);
    GED_ENCODE(Src0AddrSubRegNum, src.getIndAddrReg().subRegNum);
  }
}

void Encoder::encodeSendsSource1(const Operand &src) {
  // GED_ENCODE(Src1AddrMode, GED_ADDR_MODE_Direct);
  GED_REG_FILE gedRegFile = lowerRegFile(src.getDirRegName());
  GED_ENCODE(Src1RegFile, gedRegFile);
  if (gedRegFile == GED_REG_FILE_GRF)
    GED_ENCODE(Src1RegNum, src.getDirRegRef().regNum);
}

void Encoder::encodeSendsDestination(const Operand &dst) {
  GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
  GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
  // send types use :ud where possible
  auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
  GED_ENCODE(DstDataType, lowerDataType(t));

  // GED_ENCODE(Saturate, lowerSaturate(dst->getDstModifier()));
  // GED_ENCODE(DstHorzStride, static_cast<uint32_t>(dst->getHz()));

  GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
  // TODO: set correct regType
  GED_ENCODE(DstSubRegNum,
             SubRegToBinaryOffset(dst.getDirRegRef().subRegNum, RegName::GRF_R,
                                  dst.getType(), m_model.platform));
}

template <SourceIndex S>
void Encoder::encodeTernarySourceAlign16(const Instruction &inst) {
  // PreCNL Align16
  // GRF-only
  encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);

  const Operand &src = inst.getSource(S);

  if (inst.getOpSpec().supportsSourceModifiers()) {
    encodeSrcModifier<S>(src.getSrcModifier());
  }

  // set the data type
  GED_DATA_TYPE gedType = lowerDataType(src.getType());
  if (S == SourceIndex::SRC0) {
    GED_ENCODE(SrcDataType, gedType);
  } else {
    const Operand &src0 = inst.getSource(SourceIndex::SRC0);
    bool src0IsFloating =
        src0.getType() == Type::F || src0.getType() == Type::HF;
    if (platform() >= Platform::GEN8LP && src0IsFloating) {
      bool srcNIsFloating =
          src.getType() == Type::F || src.getType() == Type::HF;
      if (src0IsFloating && srcNIsFloating) {
        encodeSrcType<S>(src.getType());
      } else {
        fatalT("src", (int)S,
               ": mixed types require :f and :hf "
               "(or vice versa)");
        return;
      }
    }
  }

  if (!inst.isMacro()) {
    const Region &rgn = src.getRegion();
    const RegRef &reg = src.getDirRegRef();
    // Adjusting sub register when going from align1 to align16 representation.
    // in align 16 subregister is always 16 byte alligned, but we can play
    // with swizzle to access none aligned sub register
    uint16_t subRegNumber = reg.subRegNum;
    // mad (8) r46.0.xyzw:df r46.0.xyzw:df r50.0.xyzw:df r48.0.xyzw:df {Align16,
    // Q1} mad (2) r5.0.xy:df r5.0.xyxy:df r92.2.xyxy:df r93.0.xyxy:df {Align16,
    // Q1, NoMask} // BDW,SKL
    if (S != SourceIndex::SRC2) {
      if (rgn == Region::SRC8X1 || rgn == Region::SRC4X1 ||
          rgn == Region::SRC2X1) {
        encodeSrcRepCtrl<S>(GED_REP_CTRL_NoRep);
        encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z,
                            GED_SWIZZLE_w);
      } else if (rgn == Region::SRC0X0) {
        if (src.getType() == Type::DF) {
          if (reg.subRegNum % 2 == 0) {
            encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x,
                                GED_SWIZZLE_y);
          } else {
            encodeSrcChanSel<S>(GED_SWIZZLE_z, GED_SWIZZLE_w, GED_SWIZZLE_z,
                                GED_SWIZZLE_w);
            subRegNumber -= 1;
          }
        } else {
          encodeSrcRepCtrl<S>(GED_REP_CTRL_Rep);
        }
      } else {
        fatalT("src", (int)S, ": unsupported region for Align16 encoding");
        return;
      }
    } else {
      if (rgn == Region::SRCXX1) {
        encodeSrcRepCtrl<S>(GED_REP_CTRL_NoRep);
        encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z,
                            GED_SWIZZLE_w);
      } else if (rgn == Region::SRCXX0) {
        if (src.getType() == Type::DF) {
          if (src.getDirRegRef().subRegNum % 2 == 0) {
            encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x,
                                GED_SWIZZLE_y);
          } else {
            encodeSrcChanSel<S>(GED_SWIZZLE_z, GED_SWIZZLE_w, GED_SWIZZLE_z,
                                GED_SWIZZLE_w);
            subRegNumber -= 1;
          }
        } else {
          encodeSrcRepCtrl<S>(GED_REP_CTRL_Rep);
        }
      } else if (rgn == Region::SRC0X0 && src.getType() == Type::DF) {
        encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x,
                            GED_SWIZZLE_y);
      } else {
        fatalT("src", (int)S, ": unsupported region for Align16 encoding");
        return;
      }
    }
    uint32_t regNum = reg.regNum;
    encodeSrcReg<S>(RegName::GRF_R, (uint16_t)regNum);
    auto subReg = SubRegToBinaryOffset(subRegNumber, src.getDirRegName(),
                                       src.getType(), m_model.platform);
    encodeSrcSubRegNum<S>(subReg);
  } else {
    // implicit operand accumulator
    // e.g. madm (4) ... -r14.acc3
    encodeSrcReg<S>(RegName::GRF_R, src.getDirRegRef().regNum);
    encodeSrcMathMacroReg<S>(src.getMathMacroExt());
  }
}

void Encoder::encodeTernaryDestinationAlign16(const Instruction &inst) {
  const Operand &dst = inst.getDestination();
  if (inst.getOpSpec().supportsSaturation()) {
    GED_ENCODE(Saturate, lowerSaturate(dst.getDstModifier()));
  }
  GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
  if (dst.getDirRegName() != RegName::GRF_R) {
    fatalT("align16 ternary dst must be to GRF");
    return;
  }

  // register / info (must be GRF)
  GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
  uint32_t regNum = dst.getDirRegRef().regNum;
  GED_ENCODE(DstRegNum, regNum);
  if (inst.isMacro()) {
    // macro only
    GED_DST_CHAN_EN chanEn = mathMacroRegToChEn(dst.getMathMacroExt());
    GED_ENCODE(DstChanEn, chanEn);
  } else {
    // Align16 instruction (we must convert from Align1)
    //
    // As long as the Align1 sequences are packed (.xyzw), this is
    // straightforward.  However, "scalar" (braoadcast) sequences are
    // a bit harder as we must carefully choose the ChEn based on the
    // subregister that would be used in Align1
    // (See also Decoder::decodeDestinationTernaryAlign16)
    GED_DST_CHAN_EN chanEn = GED_DST_CHAN_EN_xyzw;
    auto reg = dst.getDirRegRef();
    if (inst.getExecSize() == ExecSize::SIMD1) {
      // SIMD1 MAD is not allowed, so MDF (and IGC) are generating use
      // SIMD4 and SIMD2 with specific channel masks to selectively
      // enable just the bottom channel.
      if (dst.getType() == Type::DF) {
        // For 64-bit types we use a mad (2) ...
        // Note, only :df is needed since :q and :uq are not supported
        //
        // e.g. mad (2)  r5.0.xy:df     ... {Align16, Q1, NoMask} //
        if (dst.getDirRegRef().subRegNum % 2 == 0) {
          chanEn = GED_DST_CHAN_EN_xy;
        } else {
          // e.g. mad (1) r5.1<1>:df
          //  encodes as
          //      mad (2) r5.0.zw:df
          //           ^ SIMD2 and .zw (~= .1)
          chanEn = GED_DST_CHAN_EN_zw;
          reg.subRegNum -= 1;
        }
      } else {
        // 32-bit or 16-bit type (:hf).  We use a SIMD4
        //
        // one channel enabled. E.g. we'll parse
        //   mad (1|M0)  r53.6<1>:f  ...
        // and encode it as
        //   mad (4)     r53.4.z:f
        //        ^ SIMD4    ^^^ aligned subreg .4.z == subreg .6:f Align1
        switch (reg.subRegNum % 4) {
        case 0:
          chanEn = GED_DST_CHAN_EN_x;
          break;
        case 1:
          chanEn = GED_DST_CHAN_EN_y;
          break;
        case 2:
          chanEn = GED_DST_CHAN_EN_z;
          break;
        case 3:
          chanEn = GED_DST_CHAN_EN_w;
          break;
        }
        // align the subregister
        reg.subRegNum -= (reg.subRegNum % 4);
      }
    }
    GED_ENCODE(DstChanEn, chanEn);
    GED_ENCODE(DstSubRegNum,
               SubRegToBinaryOffset(reg.subRegNum, dst.getDirRegName(),
                                    dst.getType(), m_model.platform));
  }
}

void Encoder::encodeDstReg(RegName regName, uint16_t regNum) {
  // encodes ARF or GRF
  uint32_t gedBits = translateRegNum(-1, regName, regNum);
  GED_ENCODE(DstRegNum, gedBits);
}

void Encoder::encodeImmVal(const ImmVal &val, Type type) {
  GED_ENCODE(Imm, typeConvesionHelper(val, type));
}

template <SourceIndex S> void Encoder::encodeSrcRepCtrl(GED_REP_CTRL rep) {
  if (S == SourceIndex::SRC0) {
    GED_ENCODE(Src0RepCtrl, rep);
  } else if (S == SourceIndex::SRC1) {
    GED_ENCODE(Src1RepCtrl, rep);
  } else {
    GED_ENCODE(Src2RepCtrl, rep);
  }
}

template <SourceIndex S>
void Encoder::encodeSrcChanSel(GED_SWIZZLE chSelX, GED_SWIZZLE chSelY,
                               GED_SWIZZLE chSelZ, GED_SWIZZLE chSelW) {
  uint32_t chSelBits = createChanSel(chSelX, chSelY, chSelZ, chSelW);
  if (S == SourceIndex::SRC0) {
    GED_ENCODE(Src0ChanSel, chSelBits);
  } else if (S == SourceIndex::SRC1) {
    GED_ENCODE(Src1ChanSel, chSelBits);
  } else {
    GED_ENCODE(Src2ChanSel, chSelBits);
  }
}

uint32_t Encoder::translateRegNum(int opIx, RegName regName, uint16_t regNum) {
  const char *whichOp = opIx == 0   ? "src0"
                        : opIx == 1 ? "src1"
                        : opIx == 2 ? "src2"
                                    : "dst";

  const RegInfo *ri = m_model.lookupRegInfoByRegName(regName);
  if (ri == nullptr) {
    errorT(whichOp, ": invalid register name for this platform");
  } else if (!ri->isRegNumberValid((int)regNum)) {
    errorT(whichOp, ": ", ri->syntax, regNum, " number out of range");
  } else if (regName != RegName::GRF_R) {
    return ri->encodeARFRegNum((int)regNum);
  }
  return regNum; // widen for GED
}

uint32_t Encoder::mathMacroRegToBits(int src, MathMacroExt implAcc) {
  uint32_t bits = 8; // NOACC
  switch (implAcc) {
  /// or 00000000b (GEN11)
  case MathMacroExt::MME0:
    bits = 0;
    break; // 0000b
  case MathMacroExt::MME1:
    bits = 1;
    break;
  case MathMacroExt::MME2:
    bits = 2;
    break;
  case MathMacroExt::MME3:
    bits = 3;
    break;
  case MathMacroExt::MME4:
    bits = 4;
    break;
  case MathMacroExt::MME5:
    bits = 5;
    break;
  case MathMacroExt::MME6:
    bits = 6;
    break;
  case MathMacroExt::MME7:
    bits = 7;
    break;
  /// or 00008000b (GEN11)
  case MathMacroExt::NOMME:
    bits = 8;
    break; // 1000b
  default:
    if (src < 0) {
      fatalT("dst operand has invalid math macro register");
    } else {
      fatalT("src", src, " operand has invalid math macro register");
    }
    return bits;
  }
  return bits;
}
GED_DST_CHAN_EN Encoder::mathMacroRegToChEn(MathMacroExt implAcc) {
  GED_DST_CHAN_EN bits = GED_DST_CHAN_EN_w; // NOACC
  switch (implAcc) {
  case MathMacroExt::MME0:
    bits = GED_DST_CHAN_EN_None;
    break; // 0000b
  case MathMacroExt::MME1:
    bits = GED_DST_CHAN_EN_x;
    break;
  case MathMacroExt::MME2:
    bits = GED_DST_CHAN_EN_y;
    break;
  case MathMacroExt::MME3:
    bits = GED_DST_CHAN_EN_xy;
    break;
  case MathMacroExt::MME4:
    bits = GED_DST_CHAN_EN_z;
    break; // 0100b
  case MathMacroExt::MME5:
    bits = GED_DST_CHAN_EN_xz;
    break;
  case MathMacroExt::MME6:
    bits = GED_DST_CHAN_EN_yz;
    break;
  case MathMacroExt::MME7:
    bits = GED_DST_CHAN_EN_xyz;
    break;
  case MathMacroExt::NOMME:
    bits = GED_DST_CHAN_EN_w;
    break; // 1000b
  default:
    fatalT("operand has invalid math macro register");
  }
  return bits;
}

void Encoder::encodeOptionsThreadControl(const Instruction &inst) {
  if (inst.hasInstOpt(InstOpt::NOPREEMPT)) {
    if (m_model.supportsNoPreempt()) {
      GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_NoPreempt);
    } else {
      warningT("NoPreempt not supported on this platform (dropping)");
    }
  }
}

void Encoder::encodeOptions(const Instruction &inst) {
  GED_ENCODE(DebugCtrl, inst.hasInstOpt(InstOpt::BREAKPOINT)
                            ? GED_DEBUG_CTRL_Breakpoint
                            : GED_DEBUG_CTRL_Normal);

  auto &os = inst.getOpSpec();
  if (os.supportsDepCtrl()) {
    if (inst.hasInstOpt(InstOpt::NODDCHK) &&
        !inst.hasInstOpt(InstOpt::NODDCLR)) {
      GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDChk);
    } else if (!inst.hasInstOpt(InstOpt::NODDCHK) &&
               inst.hasInstOpt(InstOpt::NODDCLR)) {
      GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDClr);
    } else if (inst.hasInstOpt(InstOpt::NODDCHK) &&
               inst.hasInstOpt(InstOpt::NODDCLR)) {
      GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDClr_NoDDChk);
    } else if (!inst.getOpSpec().isAnySendFormat() && inst.getOp() != Op::NOP) {
      GED_ENCODE(DepCtrl, GED_DEP_CTRL_Normal);
    }
  }

  if (inst.hasInstOpt(InstOpt::ATOMIC)) {
    GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Atomic);
  }


  if (inst.hasInstOpt(InstOpt::SWITCH) && m_model.supportsHwDeps()) {
    if (inst.getOp() == Op::NOP) {
      warningT("nop doesn't support Switch option (dropping)");
    } else {
      GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Switch);
    }
  }
  encodeOptionsThreadControl(inst);

  if (!inst.hasInstOpt(InstOpt::ATOMIC) && !inst.hasInstOpt(InstOpt::SWITCH) &&
      !inst.hasInstOpt(InstOpt::NOPREEMPT) &&
      !inst.getOpSpec().isAnySendFormat() && inst.getOp() != Op::NOP) {
    GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Normal);
  }

  if (inst.hasInstOpt(InstOpt::NOSRCDEPSET)) {
    GED_ENCODE(NoSrcDepSet, GED_NO_SRC_DEP_SET_NoSrcDepSet);
  } else if (inst.getOpSpec().isAnySendFormat() &&
             m_model.supportNoSrcDepSet()) {
    GED_ENCODE(NoSrcDepSet, GED_NO_SRC_DEP_SET_Normal);
  }

  if (platform() >= Platform::XE && m_opcode != Op::ILLEGAL) {
    SWSB::InstType inst_type = inst.getSWSBInstType(m_opts.swsbEncodeMode);
    uint32_t swsbBinary =
        inst.getSWSB().encode(m_opts.swsbEncodeMode, inst_type);
    IGA_ASSERT(inst.getSWSB().verify(m_opts.swsbEncodeMode, inst_type),
               "INTERNAL ERROR: invalid SWSB (parser/IR-creator should have "
               "prevented this)");

    GED_ENCODE(SWSB, swsbBinary);
  }
}

void Encoder::patchJumpOffsets() {
  for (JumpPatch &jp : m_needToPatch) {
    const Instruction *inst = jp.inst;
    IGA_ASSERT(inst->getOpSpec().isBranching() || inst->isMovWithLabel(),
               "patching non-control-flow/non-mov instruction");

    // on some platforms jmpi os post-increment
    uint32_t jmpiExtraOffset = 0;
    bool isPostIncrementJmpi =
        inst->getOp() == Op::JMPI && !m_model.supportsSimplifiedBranches();
    if (isPostIncrementJmpi) {
      // jmpi is relative to the incremented PC, hence we must add
      // the size of the instruction here.  jmpi probably will never
      // compact, but we'll be careful here
      jmpiExtraOffset = inst->hasInstOpt(InstOpt::COMPACTED) ? 8 : 16;
      IGA_ASSERT(inst->getSource(0).getKind() == Operand::Kind::LABEL,
                 "patching non label op");
      // skip registers
    }

    // calla and mov is an absolute offset
    uint32_t encodePC =
        (inst->getOpSpec().isJipAbsolute()) || (inst->getOp() == Op::MOV)
            ? 0
            : getEncodedPC(inst);

    uint32_t jumpPC = 0;
    const Block *jipBlk = inst->getJIP();
    if (jipBlk == nullptr) {
      // immediate offset: we have to treat this as a relative offset
      jumpPC = inst->getSource(0).getImmediateValue().s32 + encodePC;
    } else if (!getBlockOffset(jipBlk, jumpPC)) {
      // For call, its target symbol may not be resolvable until in the
      // link stage when other kernels are available.
      if (inst->getOp() != Op::CALL && inst->getOp() != Op::CALLA) {
        fatalAtT(inst->getLoc(), "jip label invalid");
      }
    }

    int32_t jip = jumpPC - encodePC - jmpiExtraOffset;
    // JIP and UIP are in QWORDS for most ops on PreBDW
    int32_t pcUnscale = arePcsInQWords(inst->getOpSpec()) ? 8 : 1;

    if (inst->isMovWithLabel()) {
      // encode mov label
      GED_DATA_TYPE src0_ty = lowerDataType(inst->getSource(0).getType());
      GED_ENCODE_TO(Src0DataType, src0_ty, &jp.gedInst);
      GED_ENCODE_TO(Imm, jip, &jp.gedInst);
    } else {
      // encode other branch instructions
      GED_ENCODE_TO(JIP, jip / pcUnscale, &jp.gedInst);
    }

    if (inst->getSourceCount() == 2 &&
        (inst->getOp() != Op::BRC || inst->getSource(1).isImm())) {
      // No need to set src1 regFile and type,
      // it will be over written by UIP
      const Block *uipBlk = inst->getUIP();
      if (uipBlk == nullptr) {
        jumpPC = inst->getSource(1).getImmediateValue().s32 + encodePC;
      } else if (!getBlockOffset(uipBlk, jumpPC)) {
        fatalAtT(inst->getLoc(), "uip label invalid");
      }
      encodePC = getEncodedPC(inst);
      int32_t uip = jumpPC - encodePC;
      GED_ENCODE_TO(UIP, uip / pcUnscale, &jp.gedInst);
    }

    // re-encode branch
    START_GED_TIMER();
    GED_RETURN_VALUE status = GED_EncodeIns(&jp.gedInst,
                                            inst->hasInstOpt(InstOpt::COMPACTED)
                                                ? GED_INS_TYPE_COMPACT
                                                : GED_INS_TYPE_NATIVE,
                                            jp.bits);
    STOP_GED_TIMER();
    if (status != GED_RETURN_VALUE_SUCCESS) {
      fatalAtT(inst->getLoc(),
               "GED_EncodeIns failed: ", gedReturnValueToString(status));
    }
  }
}

bool Encoder::arePcsInQWords(const OpSpec &os) const {
  // everything is in bytes except:
  // HSW calla, call, and jmpi
  return platform() < Platform::GEN8 && os.op != Op::JMPI &&
         os.op != Op::CALL && os.op != Op::CALLA;
}

bool Encoder::callNeedsSrc0Region221(const Instruction &inst) const {
  // [call]: "Restriction: The src0 regioning control must be <2;2,1>"
  // [calla]: "Restriction: The src0 regioning control must be <2;2,1>"
  return (inst.getOp() == Op::CALL && platform() < Platform::GEN8) ||
         (inst.getOp() == Op::CALL && platform() == Platform::GEN9) ||
         (inst.getOp() == Op::CALLA && platform() <= Platform::GEN10);
}

bool Encoder::callNeedsSrc0Region241(const Instruction &inst) const {
  return (inst.getOp() == Op::CALL && platform() == Platform::GEN11);
}

void Encoder::encodeTernarySrcRegionVert(SourceIndex S, Region::Vert v) {
  if (S == SourceIndex::SRC0) {
    GED_ENCODE(Src0VertStride, lowerRegionVert(v));
  } else { // (S == SourceIndex::SRC1)
    GED_ENCODE(Src1VertStride, lowerRegionVert(v));
  } // S != SRC2 since ternary Align1 doesn't have bits for that
}

// fixes stuff where GED just ignores or where it refuses to allow us to
// set bits.  This should be empty unless GED fixes are in flight.
void Encoder::applyGedWorkarounds(const Kernel &, size_t) {
  // Also consider
  // GED_RETURN_VALUE GED_SetRawBits(ged_ins_t* ins, uint8_t low, uint8_t high,
  // const uint64_t value)
}
